(module
 (memory 1)
 (data (i32.const 128) "WASMSIMDGOESFAST")
 (data (i32.const 256) "\80\90\a0\b0\c0\d0\e0\f0")
 (func (export "v128.load") (param $0 i32) (result v128) (v128.load (local.get $0)))
 (func (export "v128.store") (param $0 i32) (param $1 v128) (result v128)
   (v128.store offset=0 align=16 (local.get $0) (local.get $1))
   (v128.load (local.get $0))
 )
 (func (export "v128.const.i8x16") (result v128) (v128.const i8x16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16))
 (func (export "v128.const.i16x8") (result v128) (v128.const i16x8 1 2 3 4 5 6 7 8))
 (func (export "v128.const.i32x4") (result v128) (v128.const i32x4 1 2 3 4))
 (func (export "v128.const.i64x2") (result v128) (v128.const i64x2 1 2))
 (func (export "v128.const.f32x4") (result v128) (v128.const f32x4 1.0 2 3 4))
 (func (export "v128.const.f64x2") (result v128) (v128.const f64x2 1.0 2))
 (func (export "v128.shuffle_interleave_bytes") (param $0 v128) (param $1 v128) (result v128)
   (v8x16.shuffle 0 17 2 19 4 21 6 23 8 25 10 27 12 29 14 31 (local.get $0) (local.get $1))
 )
 (func (export "v128.shuffle_reverse_i32s") (param $0 v128) (result v128)
   (v8x16.shuffle 12 13 14 15 8 9 10 11 4 5 6 7 0 1 2 3 (local.get $0) (local.get $0))
 )
 (func (export "i8x16.splat") (param $0 i32) (result v128) (i8x16.splat (local.get $0)))
 (func (export "i8x16.extract_lane_s_first") (param $0 v128) (result i32) (i8x16.extract_lane_s 0 (local.get $0)))
 (func (export "i8x16.extract_lane_s_last") (param $0 v128) (result i32) (i8x16.extract_lane_s 15 (local.get $0)))
 (func (export "i8x16.extract_lane_u_first") (param $0 v128) (result i32) (i8x16.extract_lane_u 0 (local.get $0)))
 (func (export "i8x16.extract_lane_u_last") (param $0 v128) (result i32) (i8x16.extract_lane_u 15 (local.get $0)))
 (func (export "i8x16.replace_lane_first") (param $0 v128) (param $1 i32) (result v128) (i8x16.replace_lane 0 (local.get $0) (local.get $1)))
 (func (export "i8x16.replace_lane_last") (param $0 v128) (param $1 i32) (result v128) (i8x16.replace_lane 15 (local.get $0) (local.get $1)))
 (func (export "i16x8.splat") (param $0 i32) (result v128) (i16x8.splat (local.get $0)))
 (func (export "i16x8.extract_lane_s_first") (param $0 v128) (result i32) (i16x8.extract_lane_s 0 (local.get $0)))
 (func (export "i16x8.extract_lane_s_last") (param $0 v128) (result i32) (i16x8.extract_lane_s 7 (local.get $0)))
 (func (export "i16x8.extract_lane_u_first") (param $0 v128) (result i32) (i16x8.extract_lane_u 0 (local.get $0)))
 (func (export "i16x8.extract_lane_u_last") (param $0 v128) (result i32) (i16x8.extract_lane_u 7 (local.get $0)))
 (func (export "i16x8.replace_lane_first") (param $0 v128) (param $1 i32) (result v128) (i16x8.replace_lane 0 (local.get $0) (local.get $1)))
 (func (export "i16x8.replace_lane_last") (param $0 v128) (param $1 i32) (result v128) (i16x8.replace_lane 7 (local.get $0) (local.get $1)))
 (func (export "i32x4.splat") (param $0 i32) (result v128) (i32x4.splat (local.get $0)))
 (func (export "i32x4.extract_lane_first") (param $0 v128) (result i32) (i32x4.extract_lane 0 (local.get $0)))
 (func (export "i32x4.extract_lane_last") (param $0 v128) (result i32) (i32x4.extract_lane 3 (local.get $0)))
 (func (export "i32x4.replace_lane_first") (param $0 v128) (param $1 i32) (result v128) (i32x4.replace_lane 0 (local.get $0) (local.get $1)))
 (func (export "i32x4.replace_lane_last") (param $0 v128) (param $1 i32) (result v128) (i32x4.replace_lane 3 (local.get $0) (local.get $1)))
 (func (export "i64x2.splat") (param $0 i64) (result v128) (i64x2.splat (local.get $0)))
 (func (export "i64x2.extract_lane_first") (param $0 v128) (result i64) (i64x2.extract_lane 0 (local.get $0)))
 (func (export "i64x2.extract_lane_last") (param $0 v128) (result i64) (i64x2.extract_lane 1 (local.get $0)))
 (func (export "i64x2.replace_lane_first") (param $0 v128) (param $1 i64) (result v128) (i64x2.replace_lane 0 (local.get $0) (local.get $1)))
 (func (export "i64x2.replace_lane_last") (param $0 v128) (param $1 i64) (result v128) (i64x2.replace_lane 1 (local.get $0) (local.get $1)))
 (func (export "f32x4.splat") (param $0 f32) (result v128) (f32x4.splat (local.get $0)))
 (func (export "f32x4.extract_lane_first") (param $0 v128) (result f32) (f32x4.extract_lane 0 (local.get $0)))
 (func (export "f32x4.extract_lane_last") (param $0 v128) (result f32) (f32x4.extract_lane 3 (local.get $0)))
 (func (export "f32x4.replace_lane_first") (param $0 v128) (param $1 f32) (result v128) (f32x4.replace_lane 0 (local.get $0) (local.get $1)))
 (func (export "f32x4.replace_lane_last") (param $0 v128) (param $1 f32) (result v128) (f32x4.replace_lane 3 (local.get $0) (local.get $1)))
 (func (export "f64x2.splat") (param $0 f64) (result v128) (f64x2.splat (local.get $0)))
 (func (export "f64x2.extract_lane_first") (param $0 v128) (result f64) (f64x2.extract_lane 0 (local.get $0)))
 (func (export "f64x2.extract_lane_last") (param $0 v128) (result f64) (f64x2.extract_lane 1 (local.get $0)))
 (func (export "f64x2.replace_lane_first") (param $0 v128) (param $1 f64) (result v128) (f64x2.replace_lane 0 (local.get $0) (local.get $1)))
 (func (export "f64x2.replace_lane_last") (param $0 v128) (param $1 f64) (result v128) (f64x2.replace_lane 1 (local.get $0) (local.get $1)))
 (func (export "i8x16.eq") (param $0 v128) (param $1 v128) (result v128) (i8x16.eq (local.get $0) (local.get $1)))
 (func (export "i8x16.ne") (param $0 v128) (param $1 v128) (result v128) (i8x16.ne (local.get $0) (local.get $1)))
 (func (export "i8x16.lt_s") (param $0 v128) (param $1 v128) (result v128) (i8x16.lt_s (local.get $0) (local.get $1)))
 (func (export "i8x16.lt_u") (param $0 v128) (param $1 v128) (result v128) (i8x16.lt_u (local.get $0) (local.get $1)))
 (func (export "i8x16.gt_s") (param $0 v128) (param $1 v128) (result v128) (i8x16.gt_s (local.get $0) (local.get $1)))
 (func (export "i8x16.gt_u") (param $0 v128) (param $1 v128) (result v128) (i8x16.gt_u (local.get $0) (local.get $1)))
 (func (export "i8x16.le_s") (param $0 v128) (param $1 v128) (result v128) (i8x16.le_s (local.get $0) (local.get $1)))
 (func (export "i8x16.le_u") (param $0 v128) (param $1 v128) (result v128) (i8x16.le_u (local.get $0) (local.get $1)))
 (func (export "i8x16.ge_s") (param $0 v128) (param $1 v128) (result v128) (i8x16.ge_s (local.get $0) (local.get $1)))
 (func (export "i8x16.ge_u") (param $0 v128) (param $1 v128) (result v128) (i8x16.ge_u (local.get $0) (local.get $1)))
 (func (export "i16x8.eq") (param $0 v128) (param $1 v128) (result v128) (i16x8.eq (local.get $0) (local.get $1)))
 (func (export "i16x8.ne") (param $0 v128) (param $1 v128) (result v128) (i16x8.ne (local.get $0) (local.get $1)))
 (func (export "i16x8.lt_s") (param $0 v128) (param $1 v128) (result v128) (i16x8.lt_s (local.get $0) (local.get $1)))
 (func (export "i16x8.lt_u") (param $0 v128) (param $1 v128) (result v128) (i16x8.lt_u (local.get $0) (local.get $1)))
 (func (export "i16x8.gt_s") (param $0 v128) (param $1 v128) (result v128) (i16x8.gt_s (local.get $0) (local.get $1)))
 (func (export "i16x8.gt_u") (param $0 v128) (param $1 v128) (result v128) (i16x8.gt_u (local.get $0) (local.get $1)))
 (func (export "i16x8.le_s") (param $0 v128) (param $1 v128) (result v128) (i16x8.le_s (local.get $0) (local.get $1)))
 (func (export "i16x8.le_u") (param $0 v128) (param $1 v128) (result v128) (i16x8.le_u (local.get $0) (local.get $1)))
 (func (export "i16x8.ge_s") (param $0 v128) (param $1 v128) (result v128) (i16x8.ge_s (local.get $0) (local.get $1)))
 (func (export "i16x8.ge_u") (param $0 v128) (param $1 v128) (result v128) (i16x8.ge_u (local.get $0) (local.get $1)))
 (func (export "i32x4.eq") (param $0 v128) (param $1 v128) (result v128) (i32x4.eq (local.get $0) (local.get $1)))
 (func (export "i32x4.ne") (param $0 v128) (param $1 v128) (result v128) (i32x4.ne (local.get $0) (local.get $1)))
 (func (export "i32x4.lt_s") (param $0 v128) (param $1 v128) (result v128) (i32x4.lt_s (local.get $0) (local.get $1)))
 (func (export "i32x4.lt_u") (param $0 v128) (param $1 v128) (result v128) (i32x4.lt_u (local.get $0) (local.get $1)))
 (func (export "i32x4.gt_s") (param $0 v128) (param $1 v128) (result v128) (i32x4.gt_s (local.get $0) (local.get $1)))
 (func (export "i32x4.gt_u") (param $0 v128) (param $1 v128) (result v128) (i32x4.gt_u (local.get $0) (local.get $1)))
 (func (export "i32x4.le_s") (param $0 v128) (param $1 v128) (result v128) (i32x4.le_s (local.get $0) (local.get $1)))
 (func (export "i32x4.le_u") (param $0 v128) (param $1 v128) (result v128) (i32x4.le_u (local.get $0) (local.get $1)))
 (func (export "i32x4.ge_s") (param $0 v128) (param $1 v128) (result v128) (i32x4.ge_s (local.get $0) (local.get $1)))
 (func (export "i32x4.ge_u") (param $0 v128) (param $1 v128) (result v128) (i32x4.ge_u (local.get $0) (local.get $1)))
 (func (export "f32x4.eq") (param $0 v128) (param $1 v128) (result v128) (f32x4.eq (local.get $0) (local.get $1)))
 (func (export "f32x4.ne") (param $0 v128) (param $1 v128) (result v128) (f32x4.ne (local.get $0) (local.get $1)))
 (func (export "f32x4.lt") (param $0 v128) (param $1 v128) (result v128) (f32x4.lt (local.get $0) (local.get $1)))
 (func (export "f32x4.gt") (param $0 v128) (param $1 v128) (result v128) (f32x4.gt (local.get $0) (local.get $1)))
 (func (export "f32x4.le") (param $0 v128) (param $1 v128) (result v128) (f32x4.le (local.get $0) (local.get $1)))
 (func (export "f32x4.ge") (param $0 v128) (param $1 v128) (result v128) (f32x4.ge (local.get $0) (local.get $1)))
 (func (export "f64x2.eq") (param $0 v128) (param $1 v128) (result v128) (f64x2.eq (local.get $0) (local.get $1)))
 (func (export "f64x2.ne") (param $0 v128) (param $1 v128) (result v128) (f64x2.ne (local.get $0) (local.get $1)))
 (func (export "f64x2.lt") (param $0 v128) (param $1 v128) (result v128) (f64x2.lt (local.get $0) (local.get $1)))
 (func (export "f64x2.gt") (param $0 v128) (param $1 v128) (result v128) (f64x2.gt (local.get $0) (local.get $1)))
 (func (export "f64x2.le") (param $0 v128) (param $1 v128) (result v128) (f64x2.le (local.get $0) (local.get $1)))
 (func (export "f64x2.ge") (param $0 v128) (param $1 v128) (result v128) (f64x2.ge (local.get $0) (local.get $1)))
 (func (export "v128.not") (param $0 v128) (result v128) (v128.not (local.get $0)))
 (func (export "v128.and") (param $0 v128) (param $1 v128) (result v128) (v128.and (local.get $0) (local.get $1)))
 (func (export "v128.or") (param $0 v128) (param $1 v128) (result v128) (v128.or (local.get $0) (local.get $1)))
 (func (export "v128.xor") (param $0 v128) (param $1 v128) (result v128) (v128.xor (local.get $0) (local.get $1)))
 (func (export "v128.andnot") (param $0 v128) (param $1 v128) (result v128) (v128.andnot (local.get $0) (local.get $1)))
 (func (export "v128.bitselect") (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
   (v128.bitselect (local.get $0) (local.get $1) (local.get $2))
 )
 (func (export "i8x16.abs") (param $0 v128) (result v128) (i8x16.abs (local.get $0)))
 (func (export "i8x16.neg") (param $0 v128) (result v128) (i8x16.neg (local.get $0)))
 (func (export "i8x16.any_true") (param $0 v128) (result i32) (i8x16.any_true (local.get $0)))
 (func (export "i8x16.all_true") (param $0 v128) (result i32) (i8x16.all_true (local.get $0)))
 (func (export "i8x16.bitmask") (param $0 v128) (result i32) (i8x16.bitmask (local.get $0)))
 (func (export "i8x16.shl") (param $0 v128) (param $1 i32) (result v128) (i8x16.shl (local.get $0) (local.get $1)))
 (func (export "i8x16.shr_s") (param $0 v128) (param $1 i32) (result v128) (i8x16.shr_s (local.get $0) (local.get $1)))
 (func (export "i8x16.shr_u") (param $0 v128) (param $1 i32) (result v128) (i8x16.shr_u (local.get $0) (local.get $1)))
 (func (export "i8x16.add") (param $0 v128) (param $1 v128) (result v128) (i8x16.add (local.get $0) (local.get $1)))
 (func (export "i8x16.add_saturate_s") (param $0 v128) (param $1 v128) (result v128) (i8x16.add_saturate_s (local.get $0) (local.get $1)))
 (func (export "i8x16.add_saturate_u") (param $0 v128) (param $1 v128) (result v128) (i8x16.add_saturate_u (local.get $0) (local.get $1)))
 (func (export "i8x16.sub") (param $0 v128) (param $1 v128) (result v128) (i8x16.sub (local.get $0) (local.get $1)))
 (func (export "i8x16.sub_saturate_s") (param $0 v128) (param $1 v128) (result v128) (i8x16.sub_saturate_s (local.get $0) (local.get $1)))
 (func (export "i8x16.sub_saturate_u") (param $0 v128) (param $1 v128) (result v128) (i8x16.sub_saturate_u (local.get $0) (local.get $1)))
 (func (export "i8x16.mul") (param $0 v128) (param $1 v128) (result v128) (i8x16.mul (local.get $0) (local.get $1)))
 (func (export "i8x16.min_s") (param $0 v128) (param $1 v128) (result v128) (i8x16.min_s (local.get $0) (local.get $1)))
 (func (export "i8x16.min_u") (param $0 v128) (param $1 v128) (result v128) (i8x16.min_u (local.get $0) (local.get $1)))
 (func (export "i8x16.max_s") (param $0 v128) (param $1 v128) (result v128) (i8x16.max_s (local.get $0) (local.get $1)))
 (func (export "i8x16.max_u") (param $0 v128) (param $1 v128) (result v128) (i8x16.max_u (local.get $0) (local.get $1)))
 (func (export "i8x16.avgr_u") (param $0 v128) (param $1 v128) (result v128) (i8x16.avgr_u (local.get $0) (local.get $1)))
 (func (export "i16x8.abs") (param $0 v128) (result v128) (i16x8.abs (local.get $0)))
 (func (export "i16x8.neg") (param $0 v128) (result v128) (i16x8.neg (local.get $0)))
 (func (export "i16x8.any_true") (param $0 v128) (result i32) (i16x8.any_true (local.get $0)))
 (func (export "i16x8.all_true") (param $0 v128) (result i32) (i16x8.all_true (local.get $0)))
 (func (export "i16x8.bitmask") (param $0 v128) (result i32) (i16x8.bitmask (local.get $0)))
 (func (export "i16x8.shl") (param $0 v128) (param $1 i32) (result v128) (i16x8.shl (local.get $0) (local.get $1)))
 (func (export "i16x8.shr_s") (param $0 v128) (param $1 i32) (result v128) (i16x8.shr_s (local.get $0) (local.get $1)))
 (func (export "i16x8.shr_u") (param $0 v128) (param $1 i32) (result v128) (i16x8.shr_u (local.get $0) (local.get $1)))
 (func (export "i16x8.add") (param $0 v128) (param $1 v128) (result v128) (i16x8.add (local.get $0) (local.get $1)))
 (func (export "i16x8.add_saturate_s") (param $0 v128) (param $1 v128) (result v128) (i16x8.add_saturate_s (local.get $0) (local.get $1)))
 (func (export "i16x8.add_saturate_u") (param $0 v128) (param $1 v128) (result v128) (i16x8.add_saturate_u (local.get $0) (local.get $1)))
 (func (export "i16x8.sub") (param $0 v128) (param $1 v128) (result v128) (i16x8.sub (local.get $0) (local.get $1)))
 (func (export "i16x8.sub_saturate_s") (param $0 v128) (param $1 v128) (result v128) (i16x8.sub_saturate_s (local.get $0) (local.get $1)))
 (func (export "i16x8.sub_saturate_u") (param $0 v128) (param $1 v128) (result v128) (i16x8.sub_saturate_u (local.get $0) (local.get $1)))
 (func (export "i16x8.mul") (param $0 v128) (param $1 v128) (result v128) (i16x8.mul (local.get $0) (local.get $1)))
 (func (export "i16x8.min_s") (param $0 v128) (param $1 v128) (result v128) (i16x8.min_s (local.get $0) (local.get $1)))
 (func (export "i16x8.min_u") (param $0 v128) (param $1 v128) (result v128) (i16x8.min_u (local.get $0) (local.get $1)))
 (func (export "i16x8.max_s") (param $0 v128) (param $1 v128) (result v128) (i16x8.max_s (local.get $0) (local.get $1)))
 (func (export "i16x8.max_u") (param $0 v128) (param $1 v128) (result v128) (i16x8.max_u (local.get $0) (local.get $1)))
 (func (export "i16x8.avgr_u") (param $0 v128) (param $1 v128) (result v128) (i16x8.avgr_u (local.get $0) (local.get $1)))
 (func (export "i32x4.abs") (param $0 v128) (result v128) (i32x4.abs (local.get $0)))
 (func (export "i32x4.neg") (param $0 v128) (result v128) (i32x4.neg (local.get $0)))
 (func (export "i32x4.any_true") (param $0 v128) (result i32) (i32x4.any_true (local.get $0)))
 (func (export "i32x4.all_true") (param $0 v128) (result i32) (i32x4.all_true (local.get $0)))
 (func (export "i32x4.bitmask") (param $0 v128) (result i32) (i32x4.bitmask (local.get $0)))
 (func (export "i32x4.shl") (param $0 v128) (param $1 i32) (result v128) (i32x4.shl (local.get $0) (local.get $1)))
 (func (export "i32x4.shr_s") (param $0 v128) (param $1 i32) (result v128) (i32x4.shr_s (local.get $0) (local.get $1)))
 (func (export "i32x4.shr_u") (param $0 v128) (param $1 i32) (result v128) (i32x4.shr_u (local.get $0) (local.get $1)))
 (func (export "i32x4.add") (param $0 v128) (param $1 v128) (result v128) (i32x4.add (local.get $0) (local.get $1)))
 (func (export "i32x4.sub") (param $0 v128) (param $1 v128) (result v128) (i32x4.sub (local.get $0) (local.get $1)))
 (func (export "i32x4.mul") (param $0 v128) (param $1 v128) (result v128) (i32x4.mul (local.get $0) (local.get $1)))
 (func (export "i32x4.min_s") (param $0 v128) (param $1 v128) (result v128) (i32x4.min_s (local.get $0) (local.get $1)))
 (func (export "i32x4.min_u") (param $0 v128) (param $1 v128) (result v128) (i32x4.min_u (local.get $0) (local.get $1)))
 (func (export "i32x4.max_s") (param $0 v128) (param $1 v128) (result v128) (i32x4.max_s (local.get $0) (local.get $1)))
 (func (export "i32x4.max_u") (param $0 v128) (param $1 v128) (result v128) (i32x4.max_u (local.get $0) (local.get $1)))
 (func (export "i32x4.dot_i16x8_s") (param $0 v128) (param $1 v128) (result v128) (i32x4.dot_i16x8_s (local.get $0) (local.get $1)))
 (func (export "i64x2.neg") (param $0 v128) (result v128) (i64x2.neg (local.get $0)))
 (func (export "i64x2.any_true") (param $0 v128) (result i32) (i64x2.any_true (local.get $0)))
 (func (export "i64x2.all_true") (param $0 v128) (result i32) (i64x2.all_true (local.get $0)))
 (func (export "i64x2.shl") (param $0 v128) (param $1 i32) (result v128) (i64x2.shl (local.get $0) (local.get $1)))
 (func (export "i64x2.shr_s") (param $0 v128) (param $1 i32) (result v128) (i64x2.shr_s (local.get $0) (local.get $1)))
 (func (export "i64x2.shr_u") (param $0 v128) (param $1 i32) (result v128) (i64x2.shr_u (local.get $0) (local.get $1)))
 (func (export "i64x2.add") (param $0 v128) (param $1 v128) (result v128) (i64x2.add (local.get $0) (local.get $1)))
 (func (export "i64x2.sub") (param $0 v128) (param $1 v128) (result v128) (i64x2.sub (local.get $0) (local.get $1)))
 (func (export "i64x2.mul") (param $0 v128) (param $1 v128) (result v128) (i64x2.mul (local.get $0) (local.get $1)))
 (func (export "f32x4.abs") (param $0 v128) (result v128) (f32x4.abs (local.get $0)))
 (func (export "f32x4.neg") (param $0 v128) (result v128) (f32x4.neg (local.get $0)))
 (func (export "f32x4.sqrt") (param $0 v128) (result v128) (f32x4.sqrt (local.get $0)))
 (func (export "f32x4.qfma") (param $0 v128) (param $1 v128) (param $2 v128) (result v128) (f32x4.qfma (local.get $0) (local.get $1) (local.get $2)))
 (func (export "f32x4.qfms") (param $0 v128) (param $1 v128) (param $2 v128) (result v128) (f32x4.qfms (local.get $0) (local.get $1) (local.get $2)))
 (func (export "f32x4.add") (param $0 v128) (param $1 v128) (result v128) (f32x4.add (local.get $0) (local.get $1)))
 (func (export "f32x4.sub") (param $0 v128) (param $1 v128) (result v128) (f32x4.sub (local.get $0) (local.get $1)))
 (func (export "f32x4.mul") (param $0 v128) (param $1 v128) (result v128) (f32x4.mul (local.get $0) (local.get $1)))
 (func (export "f32x4.div") (param $0 v128) (param $1 v128) (result v128) (f32x4.div (local.get $0) (local.get $1)))
 (func (export "f32x4.min") (param $0 v128) (param $1 v128) (result v128) (f32x4.min (local.get $0) (local.get $1)))
 (func (export "f32x4.max") (param $0 v128) (param $1 v128) (result v128) (f32x4.max (local.get $0) (local.get $1)))
 (func (export "f32x4.pmin") (param $0 v128) (param $1 v128) (result v128) (f32x4.pmin (local.get $0) (local.get $1)))
 (func (export "f32x4.pmax") (param $0 v128) (param $1 v128) (result v128) (f32x4.pmax (local.get $0) (local.get $1)))
 (func (export "f32x4.ceil") (param $0 v128) (result v128) (f32x4.ceil (local.get $0)))
 (func (export "f32x4.floor") (param $0 v128) (result v128) (f32x4.floor (local.get $0)))
 (func (export "f32x4.trunc") (param $0 v128) (result v128) (f32x4.trunc (local.get $0)))
 (func (export "f32x4.nearest") (param $0 v128) (result v128) (f32x4.nearest (local.get $0)))
 (func (export "f64x2.abs") (param $0 v128) (result v128) (f64x2.abs (local.get $0)))
 (func (export "f64x2.neg") (param $0 v128) (result v128) (f64x2.neg (local.get $0)))
 (func (export "f64x2.sqrt") (param $0 v128) (result v128) (f64x2.sqrt (local.get $0)))
 (func (export "f64x2.qfma") (param $0 v128) (param $1 v128) (param $2 v128) (result v128) (f64x2.qfma (local.get $0) (local.get $1) (local.get $2)))
 (func (export "f64x2.qfms") (param $0 v128) (param $1 v128) (param $2 v128) (result v128) (f64x2.qfms (local.get $0) (local.get $1) (local.get $2)))
 (func (export "f64x2.add") (param $0 v128) (param $1 v128) (result v128) (f64x2.add (local.get $0) (local.get $1)))
 (func (export "f64x2.sub") (param $0 v128) (param $1 v128) (result v128) (f64x2.sub (local.get $0) (local.get $1)))
 (func (export "f64x2.mul") (param $0 v128) (param $1 v128) (result v128) (f64x2.mul (local.get $0) (local.get $1)))
 (func (export "f64x2.div") (param $0 v128) (param $1 v128) (result v128) (f64x2.div (local.get $0) (local.get $1)))
 (func (export "f64x2.min") (param $0 v128) (param $1 v128) (result v128) (f64x2.min (local.get $0) (local.get $1)))
 (func (export "f64x2.max") (param $0 v128) (param $1 v128) (result v128) (f64x2.max (local.get $0) (local.get $1)))
 (func (export "f64x2.pmin") (param $0 v128) (param $1 v128) (result v128) (f64x2.pmin (local.get $0) (local.get $1)))
 (func (export "f64x2.pmax") (param $0 v128) (param $1 v128) (result v128) (f64x2.pmax (local.get $0) (local.get $1)))
 (func (export "f64x2.ceil") (param $0 v128) (result v128) (f64x2.ceil (local.get $0)))
 (func (export "f64x2.floor") (param $0 v128) (result v128) (f64x2.floor (local.get $0)))
 (func (export "f64x2.trunc") (param $0 v128) (result v128) (f64x2.trunc (local.get $0)))
 (func (export "f64x2.nearest") (param $0 v128) (result v128) (f64x2.nearest (local.get $0)))
 (func (export "i32x4.trunc_sat_f32x4_s") (param $0 v128) (result v128) (i32x4.trunc_sat_f32x4_s (local.get $0)))
 (func (export "i32x4.trunc_sat_f32x4_u") (param $0 v128) (result v128) (i32x4.trunc_sat_f32x4_u (local.get $0)))
 (func (export "i64x2.trunc_sat_f64x2_s") (param $0 v128) (result v128) (i64x2.trunc_sat_f64x2_s (local.get $0)))
 (func (export "i64x2.trunc_sat_f64x2_u") (param $0 v128) (result v128) (i64x2.trunc_sat_f64x2_u (local.get $0)))
 (func (export "f32x4.convert_i32x4_s") (param $0 v128) (result v128) (f32x4.convert_i32x4_s (local.get $0)))
 (func (export "f32x4.convert_i32x4_u") (param $0 v128) (result v128) (f32x4.convert_i32x4_u (local.get $0)))
 (func (export "f64x2.convert_i64x2_s") (param $0 v128) (result v128) (f64x2.convert_i64x2_s (local.get $0)))
 (func (export "f64x2.convert_i64x2_u") (param $0 v128) (result v128) (f64x2.convert_i64x2_u (local.get $0)))
 (func (export "v8x16.load_splat") (param $0 i32) (result v128) (v8x16.load_splat (local.get $0)))
 (func (export "v16x8.load_splat") (param $0 i32) (result v128) (v16x8.load_splat (local.get $0)))
 (func (export "v32x4.load_splat") (param $0 i32) (result v128) (v32x4.load_splat (local.get $0)))
 (func (export "v64x2.load_splat") (param $0 i32) (result v128) (v64x2.load_splat (local.get $0)))
 (func (export "i8x16.narrow_i16x8_s") (param $0 v128) (param $1 v128) (result v128) (i8x16.narrow_i16x8_s (local.get $0) (local.get $1)))
 (func (export "i8x16.narrow_i16x8_u") (param $0 v128) (param $1 v128) (result v128) (i8x16.narrow_i16x8_u (local.get $0) (local.get $1)))
 (func (export "i16x8.narrow_i32x4_s") (param $0 v128) (param $1 v128) (result v128) (i16x8.narrow_i32x4_s (local.get $0) (local.get $1)))
 (func (export "i16x8.narrow_i32x4_u") (param $0 v128) (param $1 v128) (result v128) (i16x8.narrow_i32x4_u (local.get $0) (local.get $1)))
 (func (export "i16x8.widen_low_i8x16_s") (param $0 v128) (result v128) (i16x8.widen_low_i8x16_s (local.get $0)))
 (func (export "i16x8.widen_high_i8x16_s") (param $0 v128) (result v128) (i16x8.widen_high_i8x16_s (local.get $0)))
 (func (export "i16x8.widen_low_i8x16_u") (param $0 v128) (result v128) (i16x8.widen_low_i8x16_u (local.get $0)))
 (func (export "i16x8.widen_high_i8x16_u") (param $0 v128) (result v128) (i16x8.widen_high_i8x16_u (local.get $0)))
 (func (export "i32x4.widen_low_i16x8_s") (param $0 v128) (result v128) (i32x4.widen_low_i16x8_s (local.get $0)))
 (func (export "i32x4.widen_high_i16x8_s") (param $0 v128) (result v128) (i32x4.widen_high_i16x8_s (local.get $0)))
 (func (export "i32x4.widen_low_i16x8_u") (param $0 v128) (result v128) (i32x4.widen_low_i16x8_u (local.get $0)))
 (func (export "i32x4.widen_high_i16x8_u") (param $0 v128) (result v128) (i32x4.widen_high_i16x8_u (local.get $0)))
 (func (export "i16x8.load8x8_u") (param $0 i32) (result v128) (i16x8.load8x8_u (local.get $0)))
 (func (export "i16x8.load8x8_s") (param $0 i32) (result v128) (i16x8.load8x8_s (local.get $0)))
 (func (export "i32x4.load16x4_u") (param $0 i32) (result v128) (i32x4.load16x4_u (local.get $0)))
 (func (export "i32x4.load16x4_s") (param $0 i32) (result v128) (i32x4.load16x4_s (local.get $0)))
 (func (export "i64x2.load32x2_u") (param $0 i32) (result v128) (i64x2.load32x2_u (local.get $0)))
 (func (export "i64x2.load32x2_s") (param $0 i32) (result v128) (i64x2.load32x2_s (local.get $0)))
 (func (export "v128.load32_zero") (param $0 i32) (result v128) (v128.load32_zero (local.get $0)))
 (func (export "v128.load64_zero") (param $0 i32) (result v128) (v128.load64_zero (local.get $0)))
 (func (export "v8x16.swizzle") (param $0 v128) (param $1 v128) (result v128) (v8x16.swizzle (local.get $0) (local.get $1)))
)

;; Basic v128 manipulation
(assert_return (invoke "v128.load" (i32.const 128)) (v128.const i8x16 87 65 83 77 83 73 77 68 71 79 69 83 70 65 83 84))
(assert_return (invoke "v128.store" (i32.const 16) (v128.const i32x4 1 2 3 4)) (v128.const i32x4 1 2 3 4))
(assert_return (invoke "v8x16.load_splat" (i32.const 128)) (v128.const i8x16 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87))
(assert_return (invoke "v16x8.load_splat" (i32.const 128)) (v128.const i8x16 87 65 87 65 87 65 87 65 87 65 87 65 87 65 87 65))
(assert_return (invoke "v32x4.load_splat" (i32.const 128)) (v128.const i8x16 87 65 83 77 87 65 83 77 87 65 83 77 87 65 83 77))
(assert_return (invoke "v64x2.load_splat" (i32.const 128)) (v128.const i8x16 87 65 83 77 83 73 77 68 87 65 83 77 83 73 77 68))
(assert_return (invoke "v128.const.i8x16") (v128.const i32x4 0x04030201 0x08070605 0x0c0b0a09 0x100f0e0d))
(assert_return (invoke "v128.const.i16x8") (v128.const i8x16 01 00 02 00 03 00 04 00 05 00 06 00 07 00 08 00))
(assert_return (invoke "v128.const.i32x4") (v128.const i8x16 01 00 00 00 02 00 00 00 03 00 00 00 04 00 00 00))
(assert_return (invoke "v128.const.i64x2") (v128.const i8x16 01 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00))
(assert_return (invoke "v128.const.f32x4") (v128.const f32x4 1 2 3 4))
(assert_return (invoke "v128.const.f64x2") (v128.const f64x2 1 2))
(assert_return
  (invoke "v128.shuffle_interleave_bytes"
    (v128.const i8x16 1 0 3 0 5 0 7 0 9 0 11 0 13 0 15 0)
    (v128.const i8x16 0 2 0 4 0 6 0 8 0 10 0 12 0 14 0 16)
  )
  (v128.const i8x16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16)
)
(assert_return (invoke "v128.shuffle_reverse_i32s" (v128.const i32x4 1 2 3 4)) (v128.const i32x4 4 3 2 1))

;; i8x16 lane accesses
(assert_return (invoke "i8x16.splat" (i32.const 5)) (v128.const i8x16 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5))
(assert_return (invoke "i8x16.splat" (i32.const 257)) (v128.const i8x16 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1))
(assert_return (invoke "i8x16.extract_lane_s_first" (v128.const i8x16 255 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0)) (i32.const -1))
(assert_return (invoke "i8x16.extract_lane_s_last" (v128.const i8x16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 255)) (i32.const -1))
(assert_return (invoke "i8x16.extract_lane_u_first" (v128.const i8x16 255 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0)) (i32.const 255))
(assert_return (invoke "i8x16.extract_lane_u_last" (v128.const i8x16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 255)) (i32.const 255))
(assert_return (invoke "i8x16.replace_lane_first" (v128.const i64x2 0 0) (i32.const 7)) (v128.const i8x16 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0))
(assert_return (invoke "i8x16.replace_lane_last" (v128.const i64x2 0 0) (i32.const 7)) (v128.const i8x16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7))

;; i16x8 lane accesses
(assert_return (invoke "i16x8.splat" (i32.const 5)) (v128.const i16x8 5 5 5 5 5 5 5 5))
(assert_return (invoke "i16x8.splat" (i32.const 65537)) (v128.const i32x4 1 1 1 1 1 1 1 1))
(assert_return (invoke "i16x8.extract_lane_s_first" (v128.const i32x4 65535 0 0 0 0 0 0 0)) (i32.const -1))
(assert_return (invoke "i16x8.extract_lane_s_last" (v128.const i32x4 0 0 0 0 0 0 0 65535)) (i32.const -1))
(assert_return (invoke "i16x8.extract_lane_u_first" (v128.const i32x4 65535 0 0 0 0 0 0 0)) (i32.const 65535))
(assert_return (invoke "i16x8.extract_lane_u_last" (v128.const i32x4 0 0 0 0 0 0 0 65535)) (i32.const 65535))
(assert_return (invoke "i16x8.replace_lane_first" (v128.const i64x2 0 0) (i32.const 7)) (v128.const i32x4 7 0 0 0 0 0 0 0))
(assert_return (invoke "i16x8.replace_lane_last" (v128.const i64x2 0 0) (i32.const 7)) (v128.const i32x4 0 0 0 0 0 0 0 7))

;; i32x4 lane accesses
(assert_return (invoke "i32x4.splat" (i32.const -5)) (v128.const i32x4 -5 -5 -5 -5))
(assert_return (invoke "i32x4.extract_lane_first" (v128.const i32x4 -5 0 0 0)) (i32.const -5))
(assert_return (invoke "i32x4.extract_lane_last" (v128.const i32x4 0 0 0 -5)) (i32.const -5))
(assert_return (invoke "i32x4.replace_lane_first" (v128.const i64x2 0 0) (i32.const 53)) (v128.const i32x4 53 0 0 0))
(assert_return (invoke "i32x4.replace_lane_last" (v128.const i64x2 0 0) (i32.const 53)) (v128.const i32x4 0 0 0 53))

;; i64x2 lane accesses
(assert_return (invoke "i64x2.splat" (i64.const -5)) (v128.const i64x2 -5 -5))
(assert_return (invoke "i64x2.extract_lane_first" (v128.const i64x2 -5 0)) (i64.const -5))
(assert_return (invoke "i64x2.extract_lane_last" (v128.const i64x2 0 -5)) (i64.const -5))
(assert_return (invoke "i64x2.replace_lane_first" (v128.const i64x2 0 0) (i64.const 53)) (v128.const i64x2 53 0))
(assert_return (invoke "i64x2.replace_lane_last" (v128.const i64x2 0 0) (i64.const 53)) (v128.const i64x2 0 53))

;; f32x4 lane accesses
(assert_return (invoke "f32x4.splat" (f32.const -5)) (v128.const f32x4 -5 -5 -5 -5))
(assert_return (invoke "f32x4.extract_lane_first" (v128.const f32x4 -5 0 0 0)) (f32.const -5))
(assert_return (invoke "f32x4.extract_lane_last" (v128.const f32x4 0 0 0 -5)) (f32.const -5))
(assert_return (invoke "f32x4.replace_lane_first" (v128.const i64x2 0 0) (f32.const 53)) (v128.const f32x4 53 0 0 0))
(assert_return (invoke "f32x4.replace_lane_last" (v128.const i64x2 0 0) (f32.const 53)) (v128.const f32x4 0 0 0 53))

;; f64x2 lane accesses
(assert_return (invoke "f64x2.splat" (f64.const -5)) (v128.const f64x2 -5 -5))
(assert_return (invoke "f64x2.extract_lane_first" (v128.const f64x2 -5 0)) (f64.const -5))
(assert_return (invoke "f64x2.extract_lane_last" (v128.const f64x2 0 -5)) (f64.const -5))
(assert_return (invoke "f64x2.replace_lane_first" (v128.const f64x2 0 0) (f64.const 53)) (v128.const f64x2 53 0))
(assert_return (invoke "f64x2.replace_lane_last" (v128.const f64x2 0 0) (f64.const 53)) (v128.const f64x2 0 53))

;; i8x16 comparisons
(assert_return
  (invoke "i8x16.eq"
    (v128.const i32x4 0 127 13 128 1   13  129 42  0 127 255 42  1   13  129 42)
    (v128.const i32x4 0 255 13 42  129 127 0   128 0 255 13  42  129 127 0   128)
  )
  (v128.const i32x4 -1 0 -1 0 0 0 0 0 -1 0 0 -1 0 0 0 0)
)
(assert_return
  (invoke "i8x16.ne"
    (v128.const i32x4 0 127 13 128 1   13  129 42  0 127 255 42  1   13  129 42)
    (v128.const i32x4 0 255 13 42  129 127 0   128 0 255 13  42  129 127 0   128)
  )
  (v128.const i32x4 0 -1 0 -1 -1 -1 -1 -1 0 -1 -1 0 -1 -1 -1 -1)
)
(assert_return
  (invoke "i8x16.lt_s"
    (v128.const i32x4 0 127 13 128 1   13  129 42  0 127 255 42  1   13  129 42)
    (v128.const i32x4 0 255 13 42  129 127 0   128 0 255 13  42  129 127 0   128)
  )
  (v128.const i32x4 0 0 0 -1 0 -1 -1 0 0 0 -1 0 0 -1 -1 0)
)
(assert_return
  (invoke "i8x16.lt_u"
    (v128.const i32x4 0 127 13 128 1   13  129 42  0 127 255 42  1   13  129 42)
    (v128.const i32x4 0 255 13 42  129 127 0   128 0 255 13  42  129 127 0   128)
  )
  (v128.const i32x4 0 -1 0 0 -1 -1 0 -1 0 -1 0 0 -1 -1 0 -1)
)
(assert_return
  (invoke "i8x16.gt_s"
    (v128.const i32x4 0 127 13 128 1   13  129 42  0 127 255 42  1   13  129 42)
    (v128.const i32x4 0 255 13 42  129 127 0   128 0 255 13  42  129 127 0   128)
  )
  (v128.const i32x4 0 -1 0 0 -1 0 0 -1 0 -1 0 0 -1 0 0 -1)
)
(assert_return
  (invoke "i8x16.gt_u"
    (v128.const i32x4 0 127 13 128 1   13  129 42  0 127 255 42  1   13  129 42)
    (v128.const i32x4 0 255 13 42  129 127 0   128 0 255 13  42  129 127 0   128)
  )
  (v128.const i32x4 0 0 0 -1 0 0 -1 0 0 0 -1 0 0 0 -1 0)
)
(assert_return
  (invoke "i8x16.le_s"
    (v128.const i32x4 0 127 13 128 1   13  129 42  0 127 255 42  1   13  129 42)
    (v128.const i32x4 0 255 13 42  129 127 0   128 0 255 13  42  129 127 0   128)
  )
  (v128.const i32x4 -1 0 -1 -1 0 -1 -1 0 -1 0 -1 -1 0 -1 -1 0)
)
(assert_return
  (invoke "i8x16.le_u"
    (v128.const i32x4 0 127 13 128 1   13  129 42  0 127 255 42  1   13  129 42)
    (v128.const i32x4 0 255 13 42  129 127 0   128 0 255 13  42  129 127 0   128)
  )
  (v128.const i32x4 -1 -1 -1 0 -1 -1 0 -1 -1 -1 0 -1 -1 -1 0 -1)
)
(assert_return
  (invoke "i8x16.ge_s"
    (v128.const i32x4 0 127 13 128 1   13  129 42  0 127 255 42  1   13  129 42)
    (v128.const i32x4 0 255 13 42  129 127 0   128 0 255 13  42  129 127 0   128)
  )
  (v128.const i32x4 -1 -1 -1 0 -1 0 0 -1 -1 -1 0 -1 -1 0 0 -1)
)
(assert_return
  (invoke "i8x16.ge_u"
    (v128.const i32x4 0 127 13 128 1   13  129 42  0 127 255 42  1   13  129 42)
    (v128.const i32x4 0 255 13 42  129 127 0   128 0 255 13  42  129 127 0   128)
  )
  (v128.const i32x4 -1 0 -1 -1 0 0 -1 0 -1 0 -1 -1 0 0 -1 0)
)

;; i16x8 comparisons
(assert_return (invoke "i16x8.eq"
    (v128.const i32x4 0 32767 13 32768 1     32769 42    40000)
    (v128.const i32x4 0 13    1  32767 32769 42    40000 32767)
  )
  (v128.const i32x4 -1 0 0 0 0 0 0 0)
)
(assert_return
  (invoke "i16x8.ne"
    (v128.const i32x4 0 32767 13 32768 1     32769 42    40000)
    (v128.const i32x4 0 13    1  32767 32769 42    40000 32767)
  )
  (v128.const i32x4 0 -1 -1 -1 -1 -1 -1 -1)
)
(assert_return
  (invoke "i16x8.lt_s"
    (v128.const i32x4 0 32767 13 32768 1     32769 42    40000)
    (v128.const i32x4 0 13    1  32767 32769 42    40000 32767)
  )
  (v128.const i32x4 0 0 0 -1 0 -1 0 -1)
)
(assert_return
  (invoke "i16x8.lt_u"
    (v128.const i32x4 0 32767 13 32768 1     32769 42    40000)
    (v128.const i32x4 0 13    1  32767 32769 42    40000 32767)
  )
  (v128.const i32x4 0 0 0 0 -1 0 -1 0)
)
(assert_return
  (invoke "i16x8.gt_s"
    (v128.const i32x4 0 32767 13 32768 1     32769 42    40000)
    (v128.const i32x4 0 13    1  32767 32769 42    40000 32767)
  )
  (v128.const i32x4 0 -1 -1 0 -1 0 -1 0)
)
(assert_return
  (invoke "i16x8.gt_u"
    (v128.const i32x4 0 32767 13 32768 1     32769 42    40000)
    (v128.const i32x4 0 13    1  32767 32769 42    40000 32767)
  )
  (v128.const i32x4 0 -1 -1 -1 0 -1 0 -1)
)
(assert_return
  (invoke "i16x8.le_s"
    (v128.const i32x4 0 32767 13 32768 1     32769 42    40000)
    (v128.const i32x4 0 13    1  32767 32769 42    40000 32767)
  )
  (v128.const i32x4 -1 0 0 -1 0 -1 0 -1)
)
(assert_return
  (invoke "i16x8.le_u"
    (v128.const i32x4 0 32767 13 32768 1     32769 42    40000)
    (v128.const i32x4 0 13    1  32767 32769 42    40000 32767)
  )
  (v128.const i32x4 -1 0 0 0 -1 0 -1 0)
)
(assert_return
  (invoke "i16x8.ge_s"
    (v128.const i32x4 0 32767 13 32768 1     32769 42    40000)
    (v128.const i32x4 0 13    1  32767 32769 42    40000 32767)
  )
  (v128.const i32x4 -1 -1 -1 0 -1 0 -1 0)
)
(assert_return
  (invoke "i16x8.ge_u"
    (v128.const i32x4 0 32767 13 32768 1     32769 42    40000)
    (v128.const i32x4 0 13    1  32767 32769 42    40000 32767)
  )
  (v128.const i32x4 -1 -1 -1 -1 0 -1 0 -1)
)

;; i32x4 comparisons
(assert_return (invoke "i32x4.eq" (v128.const i32x4 0 -1 53 -7) (v128.const i32x4 0 53 -7 -1)) (v128.const i32x4 -1 0 0 0))
(assert_return (invoke "i32x4.ne" (v128.const i32x4 0 -1 53 -7) (v128.const i32x4 0 53 -7 -1)) (v128.const i32x4 0 -1 -1 -1))
(assert_return (invoke "i32x4.lt_s" (v128.const i32x4 0 -1 53 -7) (v128.const i32x4 0 53 -7 -1)) (v128.const i32x4 0 -1 0 -1))
(assert_return (invoke "i32x4.lt_u" (v128.const i32x4 0 -1 53 -7) (v128.const i32x4 0 53 -7 -1)) (v128.const i32x4 0 0 -1 -1))
(assert_return (invoke "i32x4.gt_s" (v128.const i32x4 0 -1 53 -7) (v128.const i32x4 0 53 -7 -1)) (v128.const i32x4 0 0 -1 0))
(assert_return (invoke "i32x4.gt_u" (v128.const i32x4 0 -1 53 -7) (v128.const i32x4 0 53 -7 -1)) (v128.const i32x4 0 -1 0 0))
(assert_return (invoke "i32x4.le_s" (v128.const i32x4 0 -1 53 -7) (v128.const i32x4 0 53 -7 -1)) (v128.const i32x4 -1 -1 0 -1))
(assert_return (invoke "i32x4.le_u" (v128.const i32x4 0 -1 53 -7) (v128.const i32x4 0 53 -7 -1)) (v128.const i32x4 -1 0 -1 -1))
(assert_return (invoke "i32x4.ge_s" (v128.const i32x4 0 -1 53 -7) (v128.const i32x4 0 53 -7 -1)) (v128.const i32x4 -1 0 -1 0))
(assert_return (invoke "i32x4.ge_u" (v128.const i32x4 0 -1 53 -7) (v128.const i32x4 0 53 -7 -1)) (v128.const i32x4 -1 -1 0 0))

;; f32x4 comparisons
(assert_return (invoke "f32x4.eq" (v128.const f32x4 0 -1 1 0) (v128.const f32x4 0 0 -1 1)) (v128.const i32x4 -1 0 0 0))
(assert_return (invoke "f32x4.ne" (v128.const f32x4 0 -1 1 0) (v128.const f32x4 0 0 -1 1)) (v128.const i32x4 0 -1 -1 -1))
(assert_return (invoke "f32x4.lt" (v128.const f32x4 0 -1 1 0) (v128.const f32x4 0 0 -1 1)) (v128.const i32x4 0 -1 0 -1))
(assert_return (invoke "f32x4.gt" (v128.const f32x4 0 -1 1 0) (v128.const f32x4 0 0 -1 1)) (v128.const i32x4 0 0 -1 0))
(assert_return (invoke "f32x4.le" (v128.const f32x4 0 -1 1 0) (v128.const f32x4 0 0 -1 1)) (v128.const i32x4 -1 -1 0 -1))
(assert_return (invoke "f32x4.ge" (v128.const f32x4 0 -1 1 0) (v128.const f32x4 0 0 -1 1)) (v128.const i32x4 -1 0 -1 0))
(assert_return (invoke "f32x4.eq" (v128.const f32x4 nan 0 nan infinity) (v128.const f32x4 0 nan nan infinity)) (v128.const i32x4 0 0 0 -1))
(assert_return (invoke "f32x4.ne" (v128.const f32x4 nan 0 nan infinity) (v128.const f32x4 0 nan nan infinity)) (v128.const i32x4 -1 -1 -1 0))
(assert_return (invoke "f32x4.lt" (v128.const f32x4 nan 0 nan infinity) (v128.const f32x4 0 nan nan infinity)) (v128.const i32x4 0 0 0 0))
(assert_return (invoke "f32x4.gt" (v128.const f32x4 nan 0 nan infinity) (v128.const f32x4 0 nan nan infinity)) (v128.const i32x4 0 0 0 0))
(assert_return (invoke "f32x4.le" (v128.const f32x4 nan 0 nan infinity) (v128.const f32x4 0 nan nan infinity)) (v128.const i32x4 0 0 0 -1))
(assert_return (invoke "f32x4.ge" (v128.const f32x4 nan 0 nan infinity) (v128.const f32x4 0 nan nan infinity)) (v128.const i32x4 0 0 0 -1))
(assert_return (invoke "f32x4.eq" (v128.const f32x4 -infinity 0 nan -infinity) (v128.const f32x4 0 infinity infinity nan)) (v128.const i32x4 0 0 0 0))
(assert_return (invoke "f32x4.ne" (v128.const f32x4 -infinity 0 nan -infinity) (v128.const f32x4 0 infinity infinity nan)) (v128.const i32x4 -1 -1 -1 -1))
(assert_return (invoke "f32x4.lt" (v128.const f32x4 -infinity 0 nan -infinity) (v128.const f32x4 0 infinity infinity nan)) (v128.const i32x4 -1 -1 0 0))
(assert_return (invoke "f32x4.gt" (v128.const f32x4 -infinity 0 nan -infinity) (v128.const f32x4 0 infinity infinity nan)) (v128.const i32x4 0 0 0 0))
(assert_return (invoke "f32x4.le" (v128.const f32x4 -infinity 0 nan -infinity) (v128.const f32x4 0 infinity infinity nan)) (v128.const i32x4 -1 -1 0 0))
(assert_return (invoke "f32x4.ge" (v128.const f32x4 -infinity 0 nan -infinity) (v128.const f32x4 0 infinity infinity nan)) (v128.const i32x4 0 0 0 0))

;; f64x2 comparisons
(assert_return (invoke "f64x2.eq" (v128.const f64x2 0 1) (v128.const f64x2 0 0)) (v128.const i64x2 -1 0))
(assert_return (invoke "f64x2.ne" (v128.const f64x2 0 1) (v128.const f64x2 0 0)) (v128.const i64x2 0 -1))
(assert_return (invoke "f64x2.lt" (v128.const f64x2 0 1) (v128.const f64x2 0 0)) (v128.const i64x2 0 0))
(assert_return (invoke "f64x2.gt" (v128.const f64x2 0 1) (v128.const f64x2 0 0)) (v128.const i64x2 0 -1))
(assert_return (invoke "f64x2.le" (v128.const f64x2 0 1) (v128.const f64x2 0 0)) (v128.const i64x2 -1 0))
(assert_return (invoke "f64x2.ge" (v128.const f64x2 0 1) (v128.const f64x2 0 0)) (v128.const i64x2 -1 -1))
(assert_return (invoke "f64x2.eq" (v128.const f64x2 nan 0) (v128.const f64x2 infinity infinity)) (v128.const i64x2 0 0))
(assert_return (invoke "f64x2.ne" (v128.const f64x2 nan 0) (v128.const f64x2 infinity infinity)) (v128.const i64x2 -1 -1))
(assert_return (invoke "f64x2.lt" (v128.const f64x2 nan 0) (v128.const f64x2 infinity infinity)) (v128.const i64x2 0 -1))
(assert_return (invoke "f64x2.gt" (v128.const f64x2 nan 0) (v128.const f64x2 infinity infinity)) (v128.const i64x2 0 0))
(assert_return (invoke "f64x2.le" (v128.const f64x2 nan 0) (v128.const f64x2 infinity infinity)) (v128.const i64x2 0 -1))
(assert_return (invoke "f64x2.ge" (v128.const f64x2 nan 0) (v128.const f64x2 infinity infinity)) (v128.const i64x2 0 0))

;; bitwise operations
(assert_return (invoke "v128.not" (v128.const i32x4 0 -1 0 -1)) (v128.const i32x4 -1 0 -1 0))
(assert_return (invoke "v128.and" (v128.const i32x4 0 0 -1 -1) (v128.const i32x4 0 -1 0 -1)) (v128.const i32x4 0 0 0 -1))
(assert_return (invoke "v128.or" (v128.const i32x4 0 0 -1 -1) (v128.const i32x4 0 -1 0 -1)) (v128.const i32x4 0 -1 -1 -1))
(assert_return (invoke "v128.xor" (v128.const i32x4 0 0 -1 -1) (v128.const i32x4 0 -1 0 -1)) (v128.const i32x4 0 -1 -1 0))
(assert_return (invoke "v128.andnot" (v128.const i32x4 0 0 -1 -1) (v128.const i32x4 0 -1 0 -1)) (v128.const i32x4 0 0 -1 0))
(assert_return (invoke "v128.bitselect"
    (v128.const i32x4 0xAAAAAAAA 0xAAAAAAAA 0xAAAAAAAA 0xAAAAAAAA)
    (v128.const i32x4 0xBBBBBBBB 0xBBBBBBBB 0xBBBBBBBB 0xBBBBBBBB)
    (v128.const i32x4 0xF0F0F0F0 0xFFFFFFFF 0x00000000 0xFF00FF00)
  )
  (v128.const i32x4 0xABABABAB 0xAAAAAAAA 0xBBBBBBBB 0xAABBAABB)
)

;; i8x16 arithmetic
(assert_return (invoke "i8x16.abs" (v128.const i8x16 0 1 42 -3 -56 127 -128 -126 0 -1 -42 3 56 -127 -128 126))
  (v128.const i8x16 0 1 42 3 56 127 -128 126 0 1 42 3 56 127 -128 126)
)
(assert_return (invoke "i8x16.neg" (v128.const i32x4 0 1 42 -3 -56 127 -128 -126 0 -1 -42 3 56 -127 -128 126))
  (v128.const i32x4 0 -1 -42 3 56 -127 -128 126 0 1 42 -3 -56 127 -128 -126)
)
(assert_return (invoke "i8x16.any_true" (v128.const i32x4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0)) (i32.const 0))
(assert_return (invoke "i8x16.any_true" (v128.const i32x4 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0)) (i32.const 1))
(assert_return (invoke "i8x16.any_true" (v128.const i32x4 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1)) (i32.const 1))
(assert_return (invoke "i8x16.any_true" (v128.const i32x4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1)) (i32.const 1))
(assert_return (invoke "i8x16.all_true" (v128.const i32x4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0)) (i32.const 0))
(assert_return (invoke "i8x16.all_true" (v128.const i32x4 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0)) (i32.const 0))
(assert_return (invoke "i8x16.all_true" (v128.const i32x4 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1)) (i32.const 0))
(assert_return (invoke "i8x16.all_true" (v128.const i32x4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1)) (i32.const 1))
(assert_return (invoke "i8x16.bitmask" (v128.const i8x16 -1 0 1 -128 127 -127 0 128 -1 0 1 -128 127 -127 0 128)) (i32.const 43433))
(assert_return (invoke "i8x16.shl" (v128.const i32x4 0 1 2 4 8 16 32 64 -128 3 6 12 24 48 96 -64) (i32.const 1))
  (v128.const i32x4 0 2 4 8 16 32 64 -128 0 6 12 24 48 96 -64 -128)
)
(assert_return (invoke "i8x16.shl" (v128.const i32x4 0 1 2 4 8 16 32 64 -128 3 6 12 24 48 96 -64) (i32.const 8))
  (v128.const i32x4 0 1 2 4 8 16 32 64 -128 3 6 12 24 48 96 -64)
)
(assert_return (invoke "i8x16.shr_u" (v128.const i32x4 0 1 2 4 8 16 32 64 -128 3 6 12 24 48 96 -64) (i32.const 1))
  (v128.const i32x4 0 0 1 2 4 8 16 32 64 1 3 6 12 24 48 96)
)
(assert_return (invoke "i8x16.shr_u" (v128.const i32x4 0 1 2 4 8 16 32 64 -128 3 6 12 24 48 96 -64) (i32.const 8))
  (v128.const i32x4 0 1 2 4 8 16 32 64 -128 3 6 12 24 48 96 -64)
)
(assert_return (invoke "i8x16.shr_s" (v128.const i32x4 0 1 2 4 8 16 32 64 -128 3 6 12 24 48 96 -64) (i32.const 1))
  (v128.const i32x4 0 0 1 2 4 8 16 32 -64 1 3 6 12 24 48 -32)
)
(assert_return (invoke "i8x16.shr_s" (v128.const i32x4 0 1 2 4 8 16 32 64 -128 3 6 12 24 48 96 -64) (i32.const 8))
  (v128.const i32x4 0 1 2 4 8 16 32 64 -128 3 6 12 24 48 96 -64)
)
(assert_return
  (invoke "i8x16.add"
    (v128.const i32x4 0  42 255 128 127 129   6 29 103 196 231 142 17 250   1  73)
    (v128.const i32x4 3 231   1 128 129 6   103 17  42  29  73  42  0 255 127 142)
  )
  (v128.const i32x4 3 17 0 0 0 135 109 46 145 225 48 184 17 249 128 215)
)
(assert_return
  (invoke "i8x16.add_saturate_s"
    (v128.const i32x4 0  42 255 128 127 129   6 29 103 196 231 142 17 250   1  73)
    (v128.const i32x4 3 231   1 128 129 6   103 17  42  29  73  42  0 255 127 142)
  )
  (v128.const i32x4 3 17 0 128 0 135 109 46 127 225 48 184 17 249 127 215)
)
(assert_return
  (invoke "i8x16.add_saturate_u"
    (v128.const i32x4 0  42 255 128 127 129   6 29 103 196 231 142 17 250   1  73)
    (v128.const i32x4 3 231   1 128 129 6   103 17  42  29  73  42  0 255 127 142)
  )
  (v128.const i32x4 3 255 255 255 255 135 109 46 145 225 255 184 17 255 128 215)
)
(assert_return
  (invoke "i8x16.sub"
    (v128.const i32x4 0  42 255 128 127 129   6 29 103 196 231 142 17 250   1  73)
    (v128.const i32x4 3 231   1 128 129 6   103 17  42  29  73  42  0 255 127 142)
  )
  (v128.const i32x4 253 67 254 0 254 123 159 12 61 167 158 100 17 251 130 187)
)
(assert_return
  (invoke "i8x16.sub_saturate_s"
    (v128.const i32x4 0  42 255 128 127 129   6 29 103 196 231 142 17 250   1  73)
    (v128.const i32x4 3 231   1 128 129 6   103 17  42  29  73  42  0 255 127 142)
  )
  (v128.const i32x4 253 67 254 0 127 128 159 12 61 167 158 128 17 251 130 127)
)
(assert_return
  (invoke "i8x16.sub_saturate_u"
    (v128.const i32x4 0  42 255 128 127 129   6 29 103 196 231 142 17 250   1  73)
    (v128.const i32x4 3 231   1 128 129 6   103 17  42  29  73  42  0 255 127 142)
  )
  (v128.const i32x4 0 0 254 0 0 123 0 12 61 167 158 100 17 0 0 0)
)
(assert_return
  (invoke "i8x16.mul"
    (v128.const i32x4 0  42 255 128 127 129   6 29 103 196 231 142 17 250   1  73)
    (v128.const i32x4 3 231   1 128 129 6   103 17  42  29  73  42  0 255 127 142)
  )
  (v128.const i32x4 0 230 255 0 255 6 106 237 230 52 223 76 0 6 127 126)
)
(assert_return
  (invoke "i8x16.min_s"
    (v128.const i32x4 0  42 255 128 127 129   6 29 103 196 231 142 17 250   1  73)
    (v128.const i32x4 3 231   1 128 129 6   103 17  42  29  73  42  0 255 127 142)
  )
  (v128.const i8x16 0 231 255 128 129 129 6 17 42 196 231 142 0 250 1 142)
)
(assert_return
  (invoke "i8x16.min_u"
    (v128.const i32x4 0  42 255 128 127 129   6 29 103 196 231 142 17 250   1  73)
    (v128.const i32x4 3 231   1 128 129 6   103 17  42  29  73  42  0 255 127 142)
  )
  (v128.const i8x16 0 42 1 128 127 6 6 17 42 29 73 42 0 250 1 73)
)
(assert_return
  (invoke "i8x16.max_s"
    (v128.const i32x4 0  42 255 128 127 129   6 29 103 196 231 142 17 250   1  73)
    (v128.const i32x4 3 231   1 128 129 6   103 17  42  29  73  42  0 255 127 142)
  )
  (v128.const i8x16 3 42 1 128 127 6 103 29 103 29 73 42 17 255 127 73)
)
(assert_return
  (invoke "i8x16.max_u"
    (v128.const i32x4 0  42 255 128 127 129   6 29 103 196 231 142 17 250   1  73)
    (v128.const i32x4 3 231   1 128 129 6   103 17  42  29  73  42  0 255 127 142)
  )
  (v128.const i8x16 3 231 255 128 129 129 103 29 103 196 231 142 17 255 127 142)
)
(assert_return
  (invoke "i8x16.avgr_u"
    (v128.const i8x16 0  42 255 128 127 129   6 29 103 196 231 142 17 250   1  73)
    (v128.const i8x16 3 231   1 128 129 6   103 17  42  29  73  42  0 255 127 142)
  )
  (v128.const i8x16 2 137 128 128 128 68 55 23 73 113 152 92 9 253 64 108)
)

;; i16x8 arithmetic
(assert_return (invoke "i16x8.abs" (v128.const i16x8 0 1 42 -3 -56 32767 -32768 32766))
  (v128.const i16x8 0 1 42 3 56 32767 -32768 32766)
)
(assert_return (invoke "i16x8.neg" (v128.const i32x4 0 1 42 -3 -56 32767 -32768 32766))
  (v128.const i32x4 0 -1 -42 3 56 -32767 -32768 -32766)
)
(assert_return (invoke "i16x8.any_true" (v128.const i32x4 0 0 0 0 0 0 0 0)) (i32.const 0))
(assert_return (invoke "i16x8.any_true" (v128.const i32x4 0 0 1 0 0 0 0 0)) (i32.const 1))
(assert_return (invoke "i16x8.any_true" (v128.const i32x4 1 1 1 1 1 0 1 1)) (i32.const 1))
(assert_return (invoke "i16x8.any_true" (v128.const i32x4 1 1 1 1 1 1 1 1)) (i32.const 1))
(assert_return (invoke "i16x8.all_true" (v128.const i32x4 0 0 0 0 0 0 0 0)) (i32.const 0))
(assert_return (invoke "i16x8.all_true" (v128.const i32x4 0 0 1 0 0 0 0 0)) (i32.const 0))
(assert_return (invoke "i16x8.all_true" (v128.const i32x4 1 1 1 1 1 0 1 1)) (i32.const 0))
(assert_return (invoke "i16x8.all_true" (v128.const i32x4 1 1 1 1 1 1 1 1)) (i32.const 1))
(assert_return (invoke "i16x8.bitmask" (v128.const i16x8 -1 0 1 -32768 32767 -32767 0 32768)) (i32.const 169))
(assert_return (invoke "i16x8.shl" (v128.const i32x4 0 8 16 128 256 2048 4096 -32768) (i32.const 1)) (v128.const i32x4 0 16 32 256 512 4096 8192 0))
(assert_return (invoke "i16x8.shl" (v128.const i32x4 0 8 16 128 256 2048 4096 -32768) (i32.const 16)) (v128.const i32x4 0 8 16 128 256 2048 4096 -32768))
(assert_return (invoke "i16x8.shr_u" (v128.const i32x4 0 8 16 128 256 2048 4096 -32768) (i32.const 1)) (v128.const i32x4 0 4 8 64 128 1024 2048 16384))
(assert_return (invoke "i16x8.shr_u" (v128.const i32x4 0 8 16 128 256 2048 4096 -32768) (i32.const 16)) (v128.const i32x4 0 8 16 128 256 2048 4096 -32768))
(assert_return (invoke "i16x8.shr_s" (v128.const i32x4 0 8 16 128 256 2048 4096 -32768) (i32.const 1)) (v128.const i32x4 0 4 8 64 128 1024 2048 -16384))
(assert_return (invoke "i16x8.shr_s" (v128.const i32x4 0 8 16 128 256 2048 4096 -32768) (i32.const 16)) (v128.const i32x4 0 8 16 128 256 2048 4096 -32768))
(assert_return
  (invoke "i16x8.add"
    (v128.const i32x4 0   65280 32768 32512 33024 59136 64000 32766)
    (v128.const i32x4 768     1 32768 33024  1536 18688 65280     2)
  )
  (v128.const i32x4 768 65281 0 0 34560 12288 63744 32768)
)
(assert_return
  (invoke "i16x8.add_saturate_s"
    (v128.const i32x4 0   65280 32768 32512 33024 59136 64000 32766)
    (v128.const i32x4 768     1 32768 33024  1536 18688 65280     2)
  )
  (v128.const i32x4 768 65281 32768 0 34560 12288 63744 32767)
)
(assert_return
  (invoke "i16x8.add_saturate_u"
    (v128.const i32x4 0   65280 32768 32512 33024 59136 64000 32766)
    (v128.const i32x4 768     1 32768 33024  1536 18688 65280     2)
  )
  (v128.const i32x4 768 65281 65535 65535 34560 65535 65535 32768)
)
(assert_return
  (invoke "i16x8.sub"
    (v128.const i32x4 0   65280 32768 32512 33024 59136 64000 32766)
    (v128.const i32x4 768     1 32768 33024  1536 18688 65280     2)
  )
  (v128.const i32x4 64768 65279 0 65024 31488 40448 64256 32764)
)
(assert_return
  (invoke "i16x8.sub_saturate_s"
    (v128.const i32x4 0   65280 32768 32512 33024 59136 64000 32766)
    (v128.const i32x4 768     1 32768 33024  1536 18688 65280     2)
  )
  (v128.const i32x4 64768 65279 0 32767 32768 40448 64256 32764)
)
(assert_return
  (invoke "i16x8.sub_saturate_u"
    (v128.const i32x4 0   65280 32768 32512 33024 59136 64000 32766)
    (v128.const i32x4 768     1 32768 33024  1536 18688 65280     2)
  )
  (v128.const i32x4 0 65279 0 0 31488 40448 0 32764)
)
(assert_return
  (invoke "i16x8.mul"
    (v128.const i32x4 0   65280 32768 32512 33024 59136 64000 32766)
    (v128.const i32x4 768     1 32768 33024  1536 18688 65280     2)
  )
  (v128.const i32x4 0 65280 0 0 0 0 0 65532)
)
(assert_return
  (invoke "i16x8.min_s"
    (v128.const i32x4 0   65280 32768 32512 33024 59136 64000 32766)
    (v128.const i32x4 768     1 32768 33024  1536 18688 65280     2)
  )
  (v128.const i32x4 0 65280 32768 33024 33024 59136 64000 2)
)
(assert_return
  (invoke "i16x8.min_u"
    (v128.const i32x4 0   65280 32768 32512 33024 59136 64000 32766)
    (v128.const i32x4 768     1 32768 33024  1536 18688 65280     2)
  )
  (v128.const i32x4 0 1 32768 32512 1536 18688 64000 2)
)
(assert_return
  (invoke "i16x8.max_s"
    (v128.const i32x4 0   65280 32768 32512 33024 59136 64000 32766)
    (v128.const i32x4 768     1 32768 33024  1536 18688 65280     2)
  )
  (v128.const i32x4 768 1 32768 32512 1536 18688 65280 32766)
)
(assert_return
  (invoke "i16x8.max_u"
    (v128.const i32x4 0   65280 32768 32512 33024 59136 64000 32766)
    (v128.const i32x4 768     1 32768 33024  1536 18688 65280     2)
  )
  (v128.const i32x4 768 65280 32768 33024 33024 59136 65280 32766)
)
(assert_return
  (invoke "i16x8.avgr_u"
    (v128.const i16x8 0   65280 32768 32512 33024 59136 64000 32766)
    (v128.const i16x8 768     1 32768 33024  1536 18688 65280     2)
  )
  (v128.const i16x8 384 32641 32768 32768 17280 38912 64640 16384)
)

;; i32x4 arithmetic
(assert_return (invoke "i32x4.abs" (v128.const i32x4 0 1 0x80000000 0x80000001)) (v128.const i32x4 0 1 0x80000000 0x7fffffff))
(assert_return (invoke "i32x4.neg" (v128.const i32x4 0 1 0x80000000 0x80000001)) (v128.const i32x4 0 -1 0x80000000 0x7fffffff))
(assert_return (invoke "i32x4.any_true" (v128.const i32x4 0 0 0 0)) (i32.const 0))
(assert_return (invoke "i32x4.any_true" (v128.const i32x4 0 0 1 0)) (i32.const 1))
(assert_return (invoke "i32x4.any_true" (v128.const i32x4 1 0 1 1)) (i32.const 1))
(assert_return (invoke "i32x4.any_true" (v128.const i32x4 1 1 1 1)) (i32.const 1))
(assert_return (invoke "i32x4.all_true" (v128.const i32x4 0 0 0 0)) (i32.const 0))
(assert_return (invoke "i32x4.all_true" (v128.const i32x4 0 0 1 0)) (i32.const 0))
(assert_return (invoke "i32x4.all_true" (v128.const i32x4 1 0 1 1)) (i32.const 0))
(assert_return (invoke "i32x4.all_true" (v128.const i32x4 1 1 1 1)) (i32.const 1))
(assert_return (invoke "i32x4.bitmask" (v128.const i32x4 -1 0 -128 127)) (i32.const 5))
(assert_return (invoke "i32x4.shl" (v128.const i32x4 1 0x40000000 0x80000000 -1) (i32.const 1)) (v128.const i32x4 2 0x80000000 0 -2))
(assert_return (invoke "i32x4.shl" (v128.const i32x4 1 0x40000000 0x80000000 -1) (i32.const 32)) (v128.const i32x4 1 0x40000000 0x80000000 -1))
(assert_return (invoke "i32x4.shr_s" (v128.const i32x4 1 0x40000000 0x80000000 -1) (i32.const 1)) (v128.const i32x4 0 0x20000000 0xc0000000 -1))
(assert_return (invoke "i32x4.shr_s" (v128.const i32x4 1 0x40000000 0x80000000 -1) (i32.const 32)) (v128.const i32x4 1 0x40000000 0x80000000 -1))
(assert_return (invoke "i32x4.shr_u" (v128.const i32x4 1 0x40000000 0x80000000 -1) (i32.const 1)) (v128.const i32x4 0 0x20000000 0x40000000 0x7fffffff))
(assert_return (invoke "i32x4.shr_u" (v128.const i32x4 1 0x40000000 0x80000000 -1) (i32.const 32)) (v128.const i32x4 1 0x40000000 0x80000000 -1))
(assert_return (invoke "i32x4.add" (v128.const i32x4 0 0x80000001 42 5) (v128.const i32x4 0 0x80000001 5 42)) (v128.const i32x4 0 2 47 47))
(assert_return (invoke "i32x4.sub" (v128.const i32x4 0 2 47 47) (v128.const i32x4 0 0x80000001 42 5)) (v128.const i32x4 0 0x80000001 5 42))
(assert_return (invoke "i32x4.mul" (v128.const i32x4 0 0x80000001 42 5) (v128.const i32x4 0 0x80000001 42 5)) (v128.const i32x4 0 1 1764 25))
(assert_return
  (invoke "i32x4.min_s" (v128.const i32x4 0 0x80000001 42 0xc0000000) (v128.const i32x4 0xffffffff 42 0 0xb0000000))
  (v128.const i32x4 0xffffffff 0x80000001 0 0xb0000000)
)
(assert_return
  (invoke "i32x4.min_u" (v128.const i32x4 0 0x80000001 42 0xc0000000) (v128.const i32x4 0xffffffff 42 0 0xb0000000))
  (v128.const i32x4 0 42 0 0xb0000000)
)
(assert_return
  (invoke "i32x4.max_s" (v128.const i32x4 0 0x80000001 42 0xc0000000) (v128.const i32x4 0xffffffff 42 0 0xb0000000))
  (v128.const i32x4 0 42 42 0xc0000000)
)
(assert_return
  (invoke "i32x4.max_u" (v128.const i32x4 0 0x80000001 42 0xc0000000) (v128.const i32x4 0xffffffff 42 0 0xb0000000))
  (v128.const i32x4 0xffffffff 0x80000001 42 0xc0000000)
)
(assert_return
  (invoke "i32x4.dot_i16x8_s" (v128.const i32x4 0 1 2 3 4 5 6 7) (v128.const i32x4 -1 2 -3 4 5 6 -7 -8))
  (v128.const i32x4 2 6 50 -98)
)

;; i64x2 arithmetic
(assert_return (invoke "i64x2.neg" (v128.const i64x2 0x8000000000000000 42)) (v128.const i64x2 0x8000000000000000 -42))
(assert_return (invoke "i64x2.any_true" (v128.const i64x2 0 0)) (i32.const 0))
(assert_return (invoke "i64x2.any_true" (v128.const i64x2 1 0)) (i32.const 1))
(assert_return (invoke "i64x2.any_true" (v128.const i64x2 1 1)) (i32.const 1))
(assert_return (invoke "i64x2.all_true" (v128.const i64x2 0 0)) (i32.const 0))
(assert_return (invoke "i64x2.all_true" (v128.const i64x2 1 0)) (i32.const 0))
(assert_return (invoke "i64x2.all_true" (v128.const i64x2 1 1)) (i32.const 1))
(assert_return (invoke "i64x2.shl" (v128.const i64x2 1 0x8000000000000000) (i32.const 1)) (v128.const i64x2 2 0))
(assert_return (invoke "i64x2.shl" (v128.const i64x2 1 0x8000000000000000) (i32.const 64)) (v128.const i64x2 1 0x8000000000000000))
(assert_return (invoke "i64x2.shr_s" (v128.const i64x2 1 0x8000000000000000) (i32.const 1)) (v128.const i64x2 0 0xc000000000000000))
(assert_return (invoke "i64x2.shr_s" (v128.const i64x2 1 0x8000000000000000) (i32.const 64)) (v128.const i64x2 1 0x8000000000000000))
(assert_return (invoke "i64x2.shr_u" (v128.const i64x2 1 0x8000000000000000) (i32.const 1)) (v128.const i64x2 0 0x4000000000000000))
(assert_return (invoke "i64x2.shr_u" (v128.const i64x2 1 0x8000000000000000) (i32.const 64)) (v128.const i64x2 1 0x8000000000000000))
(assert_return (invoke "i64x2.add" (v128.const i64x2 0x8000000000000001 42) (v128.const i64x2 0x8000000000000001 0)) (v128.const i64x2 2 42))
(assert_return (invoke "i64x2.sub" (v128.const i64x2 2 42) (v128.const i64x2 0x8000000000000001 0)) (v128.const i64x2 0x8000000000000001 42))
(assert_return (invoke "i64x2.mul" (v128.const i64x2 2 42) (v128.const i64x2 0x8000000000000001 0)) (v128.const i64x2 2 0))

;; f32x4 arithmetic
(assert_return (invoke "f32x4.abs" (v128.const f32x4 -0 nan -infinity 5)) (v128.const f32x4 0 nan infinity 5))
(assert_return (invoke "f32x4.neg" (v128.const f32x4 -0 nan -infinity 5)) (v128.const f32x4 0 -nan infinity -5))
(assert_return (invoke "f32x4.sqrt" (v128.const f32x4 -0 nan infinity 4)) (v128.const f32x4 -0 nan infinity 2))
;; TODO: qfma/qfms tests
(assert_return (invoke "f32x4.add" (v128.const f32x4 nan -nan infinity 42) (v128.const f32x4 42 infinity infinity 1)) (v128.const f32x4 nan -nan infinity 43))
(assert_return (invoke "f32x4.sub" (v128.const f32x4 nan -nan infinity 42) (v128.const f32x4 42 infinity -infinity 1)) (v128.const f32x4 nan -nan infinity 41))
(assert_return (invoke "f32x4.mul" (v128.const f32x4 nan -nan infinity 42) (v128.const f32x4 42 infinity infinity 2)) (v128.const f32x4 nan -nan infinity 84))
(assert_return (invoke "f32x4.div" (v128.const f32x4 nan -nan infinity 42) (v128.const f32x4 42 infinity 2 2)) (v128.const f32x4 nan -nan infinity 21))
(assert_return (invoke "f32x4.min" (v128.const f32x4 -0 0 nan 5) (v128.const f32x4 0 -0 5 nan)) (v128.const f32x4 -0 -0 nan nan))
(assert_return (invoke "f32x4.max" (v128.const f32x4 -0 0 nan 5) (v128.const f32x4 0 -0 5 nan)) (v128.const f32x4 0 0 nan nan))
(assert_return (invoke "f32x4.pmin" (v128.const f32x4 -0 0 nan 5) (v128.const f32x4 0 -0 5 nan)) (v128.const f32x4 -0 0 nan 5))
(assert_return (invoke "f32x4.pmax" (v128.const f32x4 -0 0 nan 5) (v128.const f32x4 0 -0 5 nan)) (v128.const f32x4 -0 0 nan 5))
(assert_return (invoke "f32x4.ceil" (v128.const f32x4 -0 0 infinity -infinity)) (v128.const f32x4 -0 0 infinity -infinity))
(assert_return (invoke "f32x4.ceil" (v128.const f32x4 nan 42 0.5 -0.5)) (v128.const f32x4 nan 42 1 -0))
(assert_return (invoke "f32x4.ceil" (v128.const f32x4 1.5 -1.5 4.2 -4.2)) (v128.const f32x4 2 -1 5 -4))
(assert_return (invoke "f32x4.floor" (v128.const f32x4 -0 0 infinity -infinity)) (v128.const f32x4 -0 0 infinity -infinity))
(assert_return (invoke "f32x4.floor" (v128.const f32x4 nan 42 0.5 -0.5)) (v128.const f32x4 nan 42 0 -1))
(assert_return (invoke "f32x4.floor" (v128.const f32x4 1.5 -1.5 4.2 -4.2)) (v128.const f32x4 1 -2 4 -5))
(assert_return (invoke "f32x4.trunc" (v128.const f32x4 -0 0 infinity -infinity)) (v128.const f32x4 -0 0 infinity -infinity))
(assert_return (invoke "f32x4.trunc" (v128.const f32x4 nan 42 0.5 -0.5)) (v128.const f32x4 nan 42 0 -0))
(assert_return (invoke "f32x4.trunc" (v128.const f32x4 1.5 -1.5 4.2 -4.2)) (v128.const f32x4 1 -1 4 -4))
(assert_return (invoke "f32x4.nearest" (v128.const f32x4 -0 0 infinity -infinity)) (v128.const f32x4 -0 0 infinity -infinity))
(assert_return (invoke "f32x4.nearest" (v128.const f32x4 nan 42 0.5 -0.5)) (v128.const f32x4 nan 42 0 -0))
(assert_return (invoke "f32x4.nearest" (v128.const f32x4 1.5 -1.5 4.2 -4.2)) (v128.const f32x4 2 -2 4 -4))

;; f64x2 arithmetic
(assert_return (invoke "f64x2.abs" (v128.const f64x2 -0 nan)) (v128.const f64x2 0 nan))
(assert_return (invoke "f64x2.abs" (v128.const f64x2 -infinity 5)) (v128.const f64x2 infinity 5))
(assert_return (invoke "f64x2.neg" (v128.const f64x2 -0 nan)) (v128.const f64x2 0 -nan))
(assert_return (invoke "f64x2.neg" (v128.const f64x2 -infinity 5)) (v128.const f64x2 infinity -5))
(assert_return (invoke "f64x2.sqrt" (v128.const f64x2 -0 nan)) (v128.const f64x2 -0 nan))
(assert_return (invoke "f64x2.sqrt" (v128.const f64x2 infinity 4)) (v128.const f64x2 infinity 2))
;; TODO: qfma/qfms tests
(assert_return (invoke "f64x2.add" (v128.const f64x2 nan -nan) (v128.const f64x2 42 infinity)) (v128.const f64x2 nan -nan))
(assert_return (invoke "f64x2.add" (v128.const f64x2 infinity 42) (v128.const f64x2 infinity 1)) (v128.const f64x2 infinity 43))
(assert_return (invoke "f64x2.sub" (v128.const f64x2 nan -nan) (v128.const f64x2 42 infinity)) (v128.const f64x2 nan -nan))
(assert_return (invoke "f64x2.sub" (v128.const f64x2 infinity 42) (v128.const f64x2 -infinity 1)) (v128.const f64x2 infinity 41))
(assert_return (invoke "f64x2.mul" (v128.const f64x2 nan -nan) (v128.const f64x2 42 infinity)) (v128.const f64x2 nan -nan))
(assert_return (invoke "f64x2.mul" (v128.const f64x2 infinity 42) (v128.const f64x2 infinity 2)) (v128.const f64x2 infinity 84))
(assert_return (invoke "f64x2.div" (v128.const f64x2 nan -nan) (v128.const f64x2 42 infinity)) (v128.const f64x2 nan -nan))
(assert_return (invoke "f64x2.div" (v128.const f64x2 infinity 42) (v128.const f64x2 2 2)) (v128.const f64x2 infinity 21))
(assert_return (invoke "f64x2.min" (v128.const f64x2 -0 0) (v128.const f64x2 0 -0)) (v128.const f64x2 -0 -0))
(assert_return (invoke "f64x2.min" (v128.const f64x2 nan 5) (v128.const f64x2 5 nan)) (v128.const f64x2 nan nan))
(assert_return (invoke "f64x2.max" (v128.const f64x2 -0 0) (v128.const f64x2 0 -0)) (v128.const f64x2 0 0))
(assert_return (invoke "f64x2.max" (v128.const f64x2 nan 5) (v128.const f64x2 5 nan)) (v128.const f64x2 nan nan))
(assert_return (invoke "f64x2.pmin" (v128.const f64x2 -0 0) (v128.const f64x2 0 -0)) (v128.const f64x2 -0 0))
(assert_return (invoke "f64x2.pmin" (v128.const f64x2 nan 5) (v128.const f64x2 5 nan)) (v128.const f64x2 nan 5))
(assert_return (invoke "f64x2.pmax" (v128.const f64x2 -0 0) (v128.const f64x2 0 -0)) (v128.const f64x2 -0 0))
(assert_return (invoke "f64x2.pmax" (v128.const f64x2 nan 5) (v128.const f64x2 5 nan)) (v128.const f64x2 nan 5))
(assert_return (invoke "f64x2.ceil" (v128.const f64x2 -0 0)) (v128.const f64x2 -0 0))
(assert_return (invoke "f64x2.ceil" (v128.const f64x2 infinity -infinity)) (v128.const f64x2 infinity -infinity))
(assert_return (invoke "f64x2.ceil" (v128.const f64x2 nan 42)) (v128.const f64x2 nan 42))
(assert_return (invoke "f64x2.ceil" (v128.const f64x2 0.5 -0.5)) (v128.const f64x2 1 -0))
(assert_return (invoke "f64x2.ceil" (v128.const f64x2 1.5 -1.5)) (v128.const f64x2 2 -1))
(assert_return (invoke "f64x2.ceil" (v128.const f64x2 4.2 -4.2)) (v128.const f64x2 5 -4))
(assert_return (invoke "f64x2.floor" (v128.const f64x2 -0 0)) (v128.const f64x2 -0 0))
(assert_return (invoke "f64x2.floor" (v128.const f64x2 infinity -infinity)) (v128.const f64x2 infinity -infinity))
(assert_return (invoke "f64x2.floor" (v128.const f64x2 nan 42)) (v128.const f64x2 nan 42))
(assert_return (invoke "f64x2.floor" (v128.const f64x2 0.5 -0.5)) (v128.const f64x2 0 -1))
(assert_return (invoke "f64x2.floor" (v128.const f64x2 1.5 -1.5)) (v128.const f64x2 1 -2))
(assert_return (invoke "f64x2.floor" (v128.const f64x2 4.2 -4.2)) (v128.const f64x2 4 -5))
(assert_return (invoke "f64x2.trunc" (v128.const f64x2 -0 0)) (v128.const f64x2 -0 0))
(assert_return (invoke "f64x2.trunc" (v128.const f64x2 infinity -infinity)) (v128.const f64x2 infinity -infinity))
(assert_return (invoke "f64x2.trunc" (v128.const f64x2 nan 42)) (v128.const f64x2 nan 42))
(assert_return (invoke "f64x2.trunc" (v128.const f64x2 0.5 -0.5)) (v128.const f64x2 0 -0))
(assert_return (invoke "f64x2.trunc" (v128.const f64x2 1.5 -1.5)) (v128.const f64x2 1 -1))
(assert_return (invoke "f64x2.trunc" (v128.const f64x2 4.2 -4.2)) (v128.const f64x2 4 -4))
(assert_return (invoke "f64x2.nearest" (v128.const f64x2 -0 0)) (v128.const f64x2 -0 0))
(assert_return (invoke "f64x2.nearest" (v128.const f64x2 infinity -infinity)) (v128.const f64x2 infinity -infinity))
(assert_return (invoke "f64x2.nearest" (v128.const f64x2 nan 42)) (v128.const f64x2 nan 42))
(assert_return (invoke "f64x2.nearest" (v128.const f64x2 0.5 -0.5)) (v128.const f64x2 0 -0))
(assert_return (invoke "f64x2.nearest" (v128.const f64x2 1.5 -1.5)) (v128.const f64x2 2 -2))
(assert_return (invoke "f64x2.nearest" (v128.const f64x2 4.2 -4.2)) (v128.const f64x2 4 -4))

;; conversions
(assert_return (invoke "i32x4.trunc_sat_f32x4_s" (v128.const f32x4 42 nan infinity -infinity)) (v128.const i32x4 42 0 2147483647 -2147483648))
(assert_return (invoke "i32x4.trunc_sat_f32x4_u" (v128.const f32x4 42 nan infinity -infinity)) (v128.const i32x4 42 0 4294967295 0))
(assert_return (invoke "i64x2.trunc_sat_f64x2_s" (v128.const f64x2 42 nan)) (v128.const i64x2 42 0))
(assert_return (invoke "i64x2.trunc_sat_f64x2_s" (v128.const f64x2 infinity -infinity)) (v128.const i64x2 9223372036854775807 -9223372036854775808))
(assert_return (invoke "i64x2.trunc_sat_f64x2_u" (v128.const f64x2 42 nan)) (v128.const i64x2 42 0))
(assert_return (invoke "i64x2.trunc_sat_f64x2_u" (v128.const f64x2 infinity -infinity)) (v128.const i64x2 18446744073709551615 0))
(assert_return (invoke "f32x4.convert_i32x4_s" (v128.const i32x4 0 -1 2147483647 -2147483648)) (v128.const f32x4 0 -1 2147483648 -2147483648))
(assert_return (invoke "f32x4.convert_i32x4_u" (v128.const i32x4 0 -1 2147483647 -2147483648)) (v128.const f32x4 0 4294967296 2147483648 2147483648))
(assert_return (invoke "f64x2.convert_i64x2_s" (v128.const i64x2 0 -1)) (v128.const f64x2 0 -1))
(assert_return (invoke "f64x2.convert_i64x2_s" (v128.const i64x2 9223372036854775807 -9223372036854775808)) (v128.const f64x2 9223372036854775807 -9223372036854775808))
(assert_return (invoke "f64x2.convert_i64x2_u" (v128.const i64x2 0 -1)) (v128.const f64x2 0 18446744073709551616))
(assert_return (invoke "f64x2.convert_i64x2_u" (v128.const i64x2 9223372036854775807 -9223372036854775808)) (v128.const f64x2 9223372036854775807 9223372036854775808))
(assert_return
  (invoke "i8x16.narrow_i16x8_s"
    (v128.const i16x8 129 127 -32767 32767 -32768 -1 1 0)
    (v128.const i16x8 0 1 -1 -32768 32767 -32767 127 129)
  )
  (v128.const i8x16 127 127 -128 127 -128 -1 1 0 0 1 -1 -128 127 -128 127 127)
)
(assert_return
  (invoke "i8x16.narrow_i16x8_u"
    (v128.const i16x8 129 127 -32767 32767 -32768 -1 1 0)
    (v128.const i16x8 0 1 -1 -32768 32767 -32767 127 129)
  )
  (v128.const i8x16 129 127 0 255 0 0 1 0 0 1 0 0 255 0 127 129)
)
(assert_return
  (invoke "i16x8.narrow_i32x4_s"
    (v128.const i32x4 32769 32767 -2147483647 2147483647)
    (v128.const i32x4 0 1 -1 -2147483648)
  )
  (v128.const i16x8 32767 32767 -32768 32767 0 1 -1 -32768)
)
(assert_return
  (invoke "i16x8.narrow_i32x4_u"
    (v128.const i32x4 32769 32767 -2147483647 2147483647)
    (v128.const i32x4 0 1 -1 -2147483648)
  )
  (v128.const i16x8 32769 32767 0 65535 0 1 0 0)
)
(assert_return
  (invoke "i16x8.widen_low_i8x16_s"
    (v128.const i8x16 0 1 -1 -128 127 129 64 -64 -64 64 129 127 -128 -1 1 0)
  )
  (v128.const i16x8 0 1 -1 -128 127 -127 64 -64)
)
(assert_return
  (invoke "i16x8.widen_high_i8x16_s"
    (v128.const i8x16 0 1 -1 -128 127 129 64 -64 -64 64 129 127 -128 -1 1 0)
  )
  (v128.const i16x8 -64 64 -127 127 -128 -1 1 0)
)
(assert_return
  (invoke "i16x8.widen_low_i8x16_u"
    (v128.const i8x16 0 1 -1 -128 127 129 64 -64 -64 64 129 127 -128 -1 1 0)
  )
  (v128.const i16x8 0 1 255 128 127 129 64 192)
)
(assert_return
  (invoke "i16x8.widen_high_i8x16_u"
    (v128.const i8x16 0 1 -1 -128 127 129 64 -64 -64 64 129 127 -128 -1 1 0)
  )
  (v128.const i16x8 192 64 129 127 128 255 1 0)
)
(assert_return (invoke "i32x4.widen_low_i16x8_s" (v128.const i16x8 0 1 -1 32768 32767 32769 16384 -16384)) (v128.const i32x4 0 1 -1 -32768))
(assert_return (invoke "i32x4.widen_high_i16x8_s" (v128.const i16x8 0 1 -1 32768 32767 32769 16384 -16384)) (v128.const i32x4 32767 -32767 16384 -16384))
(assert_return (invoke "i32x4.widen_low_i16x8_u" (v128.const i16x8 0 1 -1 32768 32767 32769 16384 -16384)) (v128.const i32x4 0 1 65535 32768))
(assert_return (invoke "i32x4.widen_high_i16x8_u" (v128.const i16x8 0 1 -1 32768 32767 32769 16384 -16384)) (v128.const i32x4 32767 32769 16384 49152))
(assert_return (invoke "i16x8.load8x8_s" (i32.const 256)) (v128.const i16x8 0xff80 0xff90 0xffa0 0xffb0 0xffc0 0xffd0 0xffe0 0xfff0))
(assert_return (invoke "i16x8.load8x8_u" (i32.const 256)) (v128.const i16x8 0x0080 0x0090 0x00a0 0x00b0 0x00c0 0x00d0 0x00e0 0x00f0))
(assert_return (invoke "i32x4.load16x4_s" (i32.const 256)) (v128.const i32x4 0xffff9080 0xffffb0a0 0xffffd0c0 0xfffff0e0))
(assert_return (invoke "i32x4.load16x4_u" (i32.const 256)) (v128.const i32x4 0x00009080 0x0000b0a0 0x0000d0c0 0x0000f0e0))
(assert_return (invoke "i64x2.load32x2_s" (i32.const 256)) (v128.const i64x2 0xffffffffb0a09080 0xfffffffff0e0d0c0))
(assert_return (invoke "i64x2.load32x2_u" (i32.const 256)) (v128.const i64x2 0x00000000b0a09080 0x00000000f0e0d0c0))
(assert_return (invoke "v128.load32_zero" (i32.const 256)) (v128.const i32x4 0xb0a09080 0 0 0))
(assert_return (invoke "v128.load64_zero" (i32.const 256)) (v128.const i64x2 0xf0e0d0c0b0a09080 0))
(assert_return
  (invoke "v8x16.swizzle"
    (v128.const i8x16 0xf0 0xf1 0xf2 0xf3 0xf4 0xf5 0xf6 0xf7 0xf8 0xf9 0xfa 0xfb 0xfc 0xfd 0xfe 0xff)
    (v128.const i8x16 0 4 8 12 16 255 129 128 127 17 15 13 12 8 4 0)
  )
  (v128.const i8x16 0xf0 0xf4 0xf8 0xfc 0x00 0x00 0x00 0x00 0x00 0x00 0xff 0xfd 0xfc 0xf8 0xf4 0xf0)
)
